1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.solr.internal.csv;
18
19 import java.io.IOException;
20 import java.io.Reader;
21 import java.io.StringReader;
22 import java.util.Arrays;
23
24 import junit.framework.TestCase;
25
26
27
28
29
30
31
32
33
34
35 public class CSVParserTest extends TestCase {
36
37
38
39
40 class TestCSVParser extends CSVParser {
41
42
43
44
45 TestCSVParser(Reader in) {
46 super(in);
47 }
48
49 TestCSVParser(Reader in, CSVStrategy strategy) {
50 super(in, strategy);
51 }
52
53
54
55
56
57
58 public String testNextToken() throws IOException {
59 Token t = super.nextToken();
60 return Integer.toString(t.type) + ";" + t.content + ";";
61 }
62 }
63
64
65
66
67
68
69 public void testNextToken1() throws IOException {
70 String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
71 TestCSVParser parser = new TestCSVParser(new StringReader(code));
72 assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
73 assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
74 assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
75 assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
76 assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
77 assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
78 assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
79 assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
80 assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
81 assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
82 }
83
84
85 public void testNextToken2() throws IOException {
86
87
88
89
90
91
92
93 String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
94 CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
95
96 strategy.setCommentStart('#');
97
98 TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
99
100
101 assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
102 assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
103 assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
104 assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
105 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
106 assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
107 assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
108 assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
109 assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
110 assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
111 assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
112 assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
113 assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
114
115 }
116
117
118 public void testNextToken3() throws IOException {
119
120
121
122 String code = "a,\\,,b\n\\,,";
123 CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
124 strategy.setCommentStart('#');
125 TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
126
127 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
128
129 assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
130 assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
131 assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
132
133 assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
134 assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
135 assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
136 }
137
138
139 public void testNextToken4() throws IOException {
140
141
142
143
144
145 String code =
146 "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
147 TestCSVParser parser = new TestCSVParser(new StringReader(code));
148 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
149 assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
150 assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
151 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
152 assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
153 assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
154 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
155 assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
156 assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
157 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
158 assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
159
160 assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
161 }
162
163
164 public void testNextToken5() throws IOException {
165 String code =
166 "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
167 TestCSVParser parser = new TestCSVParser(new StringReader(code));
168 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
169 assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
170 assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
171 assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
172 parser.testNextToken());
173 assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());
174
175 }
176
177
178 public void testNextToken6() throws IOException {
179
180
181
182
183
184 String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
185 TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
186 assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
187 assertEquals(
188 CSVParser.TT_EORECORD + ";b and ' more\n;",
189 parser.testNextToken());
190 }
191
192
193
194
195
196
197 String code =
198 "a,b,c,d\n"
199 + " a , b , 1 2 \n"
200 + "\"foo baar\", b,\n"
201
202 + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n";
203 String[][] res = {
204 {"a", "b", "c", "d"},
205 {"a", "b", "1 2"},
206 {"foo baar", "b", ""},
207 {"foo\n,,\n\",,\n\"", "d", "e"}
208 };
209 public void testGetLine() throws IOException {
210 CSVParser parser = new CSVParser(new StringReader(code));
211 String[] tmp = null;
212 for (int i = 0; i < res.length; i++) {
213 tmp = parser.getLine();
214 assertTrue(Arrays.equals(res[i], tmp));
215 }
216 tmp = parser.getLine();
217 assertTrue(tmp == null);
218 }
219
220 public void testNextValue() throws IOException {
221 CSVParser parser = new CSVParser(new StringReader(code));
222 String tmp = null;
223 for (int i = 0; i < res.length; i++) {
224 for (int j = 0; j < res[i].length; j++) {
225 tmp = parser.nextValue();
226 assertEquals(res[i][j], tmp);
227 }
228 }
229 tmp = parser.nextValue();
230 assertTrue(tmp == null);
231 }
232
233 public void testGetAllValues() throws IOException {
234 CSVParser parser = new CSVParser(new StringReader(code));
235 String[][] tmp = parser.getAllValues();
236 assertEquals(res.length, tmp.length);
237 assertTrue(tmp.length > 0);
238 for (int i = 0; i < res.length; i++) {
239 assertTrue(Arrays.equals(res[i], tmp[i]));
240 }
241 }
242
243 public void testExcelStrategy1() throws IOException {
244 String code =
245 "value1,value2,value3,value4\r\na,b,c,d\r\n x,,,"
246 + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
247 String[][] res = {
248 {"value1", "value2", "value3", "value4"},
249 {"a", "b", "c", "d"},
250 {" x", "", "", ""},
251 {""},
252 {"\"hello\"", " \"world\"", "abc\ndef", ""}
253 };
254 CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
255 String[][] tmp = parser.getAllValues();
256 assertEquals(res.length, tmp.length);
257 assertTrue(tmp.length > 0);
258 for (int i = 0; i < res.length; i++) {
259 assertTrue(Arrays.equals(res[i], tmp[i]));
260 }
261 }
262
263 public void testExcelStrategy2() throws Exception {
264 String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
265 String[][] res = {
266 {"foo", "baar"},
267 {""},
268 {"hello", ""},
269 {""},
270 {"world", ""}
271 };
272 CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
273 String[][] tmp = parser.getAllValues();
274 assertEquals(res.length, tmp.length);
275 assertTrue(tmp.length > 0);
276 for (int i = 0; i < res.length; i++) {
277 assertTrue(Arrays.equals(res[i], tmp[i]));
278 }
279 }
280
281 public void testEndOfFileBehaviourExcel() throws Exception {
282 String[] codes = {
283 "hello,\r\n\r\nworld,\r\n",
284 "hello,\r\n\r\nworld,",
285 "hello,\r\n\r\nworld,\"\"\r\n",
286 "hello,\r\n\r\nworld,\"\"",
287 "hello,\r\n\r\nworld,\n",
288 "hello,\r\n\r\nworld,",
289 "hello,\r\n\r\nworld,\"\"\n",
290 "hello,\r\n\r\nworld,\"\""
291 };
292 String[][] res = {
293 {"hello", ""},
294 {""},
295 {"world", ""}
296 };
297 String code;
298 for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
299 code = codes[codeIndex];
300 CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
301 String[][] tmp = parser.getAllValues();
302 assertEquals(res.length, tmp.length);
303 assertTrue(tmp.length > 0);
304 for (int i = 0; i < res.length; i++) {
305 assertTrue(Arrays.equals(res[i], tmp[i]));
306 }
307 }
308 }
309
310 public void testEndOfFileBehaviorCSV() throws Exception {
311 String[] codes = {
312 "hello,\r\n\r\nworld,\r\n",
313 "hello,\r\n\r\nworld,",
314 "hello,\r\n\r\nworld,\"\"\r\n",
315 "hello,\r\n\r\nworld,\"\"",
316 "hello,\r\n\r\nworld,\n",
317 "hello,\r\n\r\nworld,",
318 "hello,\r\n\r\nworld,\"\"\n",
319 "hello,\r\n\r\nworld,\"\""
320 };
321 String[][] res = {
322 {"hello", ""},
323 {"world", ""}
324 };
325 String code;
326 for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
327 code = codes[codeIndex];
328 CSVParser parser = new CSVParser(new StringReader(code));
329 String[][] tmp = parser.getAllValues();
330 assertEquals(res.length, tmp.length);
331 assertTrue(tmp.length > 0);
332 for (int i = 0; i < res.length; i++) {
333 assertTrue(Arrays.equals(res[i], tmp[i]));
334 }
335 }
336 }
337
338 public void testEmptyLineBehaviourExcel() throws Exception {
339 String[] codes = {
340 "hello,\r\n\r\n\r\n",
341 "hello,\n\n\n",
342 "hello,\"\"\r\n\r\n\r\n",
343 "hello,\"\"\n\n\n"
344 };
345 String[][] res = {
346 {"hello", ""},
347 {""},
348 {""}
349 };
350 String code;
351 for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
352 code = codes[codeIndex];
353 CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
354 String[][] tmp = parser.getAllValues();
355 assertEquals(res.length, tmp.length);
356 assertTrue(tmp.length > 0);
357 for (int i = 0; i < res.length; i++) {
358 assertTrue(Arrays.equals(res[i], tmp[i]));
359 }
360 }
361 }
362
363 public void testEmptyLineBehaviourCSV() throws Exception {
364 String[] codes = {
365 "hello,\r\n\r\n\r\n",
366 "hello,\n\n\n",
367 "hello,\"\"\r\n\r\n\r\n",
368 "hello,\"\"\n\n\n"
369 };
370 String[][] res = {
371 {"hello", ""}
372 };
373 String code;
374 for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
375 code = codes[codeIndex];
376 CSVParser parser = new CSVParser(new StringReader(code));
377 String[][] tmp = parser.getAllValues();
378 assertEquals(res.length, tmp.length);
379 assertTrue(tmp.length > 0);
380 for (int i = 0; i < res.length; i++) {
381 assertTrue(Arrays.equals(res[i], tmp[i]));
382 }
383 }
384 }
385
386 public void OLDtestBackslashEscaping() throws IOException {
387 String code =
388 "one,two,three\n"
389 + "on\\\"e,two\n"
390 + "on\"e,two\n"
391 + "one,\"tw\\\"o\"\n"
392 + "one,\"t\\,wo\"\n"
393 + "one,two,\"th,ree\"\n"
394 + "\"a\\\\\"\n"
395 + "a\\,b\n"
396 + "\"a\\\\,b\"";
397 String[][] res = {
398 { "one", "two", "three" },
399 { "on\\\"e", "two" },
400 { "on\"e", "two" },
401 { "one", "tw\"o" },
402 { "one", "t\\,wo" },
403 { "one", "two", "th,ree" },
404 { "a\\\\" },
405 { "a\\", "b" },
406 { "a\\\\,b" }
407 };
408 CSVParser parser = new CSVParser(new StringReader(code));
409 String[][] tmp = parser.getAllValues();
410 assertEquals(res.length, tmp.length);
411 assertTrue(tmp.length > 0);
412 for (int i = 0; i < res.length; i++) {
413 assertTrue(Arrays.equals(res[i], tmp[i]));
414 }
415 }
416
417 public void testBackslashEscaping() throws IOException {
418
419
420
421
422
423 String code =
424 "one,two,three\n"
425 + "'',''\n"
426 + "/',/'\n"
427 + "'/'','/''\n"
428 + "'''',''''\n"
429 + "/,,/,\n"
430 + "//,//\n" // 6) escape escaped
431 + "'//','//'\n" // 7) escape escaped in encapsulation
432 + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
433 + "9, /\n \n"
434 + "";
435 String[][] res = {
436 { "one", "two", "three" },
437 { "", "" },
438 { "'", "'" },
439 { "'", "'" },
440 { "'", "'" },
441 { ",", "," },
442 { "/", "/" },
443 { "/", "/" },
444 { " 8 ", " \"quoted \"\" \" / string\" " },
445 { "9", " \n " },
446 };
447
448
449 CSVStrategy strategy = new CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
450
451 CSVParser parser = new CSVParser(new StringReader(code), strategy);
452 String[][] tmp = parser.getAllValues();
453 assertTrue(tmp.length > 0);
454 for (int i = 0; i < res.length; i++) {
455 assertTrue(Arrays.equals(res[i], tmp[i]));
456 }
457 }
458
459 public void testBackslashEscaping2() throws IOException {
460
461
462
463
464
465 String code = ""
466 + " , , \n"
467 + " \t , , \n"
468 + " // , /, , /,\n" // 3)
469 + "";
470 String[][] res = {
471 { " ", " ", " " },
472 { " \t ", " ", " " },
473 { " / ", " , ", " ," },
474 };
475
476
477 CSVStrategy strategy = new CSVStrategy(',',CSVStrategy.ENCAPSULATOR_DISABLED,CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
478
479 CSVParser parser = new CSVParser(new StringReader(code), strategy);
480 String[][] tmp = parser.getAllValues();
481 assertTrue(tmp.length > 0);
482
483 if (!CSVPrinterTest.equals(res, tmp)) {
484 assertTrue(false);
485 }
486
487 }
488
489
490 public void testDefaultStrategy() throws IOException {
491
492 String code = ""
493 + "a,b\n"
494 + "\"\n\",\" \"\n"
495 + "\"\",#\n"
496 ;
497 String[][] res = {
498 { "a", "b" },
499 { "\n", " " },
500 { "", "#" },
501 };
502
503 CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
504 assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());
505
506 CSVParser parser = new CSVParser(new StringReader(code), strategy);
507 String[][] tmp = parser.getAllValues();
508 assertTrue(tmp.length > 0);
509
510 if (!CSVPrinterTest.equals(res, tmp)) {
511 assertTrue(false);
512 }
513
514 String[][] res_comments = {
515 { "a", "b" },
516 { "\n", " " },
517 { ""},
518 };
519
520 strategy = new CSVStrategy(',','"','#');
521 parser = new CSVParser(new StringReader(code), strategy);
522 tmp = parser.getAllValues();
523
524 if (!CSVPrinterTest.equals(res_comments, tmp)) {
525 assertTrue(false);
526 }
527 }
528
529
530 public void testUnicodeEscape() throws IOException {
531 String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
532 CSVParser parser = new CSVParser(new StringReader(code));
533 parser.getStrategy().setUnicodeEscapeInterpretation(true);
534 String[] data = parser.getLine();
535 assertEquals(2, data.length);
536 assertEquals("abc", data[0]);
537 assertEquals("public", data[1]);
538 }
539
540 public void testCarriageReturnLineFeedEndings() throws IOException {
541 String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
542 CSVParser parser = new CSVParser(new StringReader(code));
543 String[][] data = parser.getAllValues();
544 assertEquals(4, data.length);
545 }
546
547 public void testIgnoreEmptyLines() throws IOException {
548 String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
549
550
551 CSVParser parser = new CSVParser(new StringReader(code));
552 String[][] data = parser.getAllValues();
553 assertEquals(3, data.length);
554 }
555
556 public void testLineTokenConsistency() throws IOException {
557 String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
558 CSVParser parser = new CSVParser(new StringReader(code));
559 String[][] data = parser.getAllValues();
560 parser = new CSVParser(new StringReader(code));
561 CSVParser parser1 = new CSVParser(new StringReader(code));
562 for (int i = 0; i < data.length; i++) {
563 assertTrue(Arrays.equals(parser1.getLine(), data[i]));
564 for (int j = 0; j < data[i].length; j++) {
565 assertEquals(parser.nextValue(), data[i][j]);
566 }
567 }
568 }
569
570
571 public void testDelimiterIsWhitespace() throws IOException {
572 String code = "one\ttwo\t\tfour \t five\t six";
573 TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
574 assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
575 assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
576 assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
577 assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
578 assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
579 assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
580 }
581 }